home *** CD-ROM | disk | FTP | other *** search
- #
- # Property.py
- # JunkMatcher
- #
- # Created by Benjamin Han on 2/1/05.
- # Copyright (c) 2005 Benjamin Han. All rights reserved.
- #
-
- # This program is free software; you can redistribute it and/or
- # modify it under the terms of the GNU General Public License
- # as published by the Free Software Foundation; either version 2
- # of the License, or (at your option) any later version.
-
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
-
- # You should have received a copy of the GNU General Public License
- # along with this program; if not, write to the Free Software
- # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-
- #!/usr/bin/env python
-
- import datetime
- from time import time as now
-
- from consts import *
- from utilities import *
- from emailAddress import *
- from checkIP import *
- from GlobalObjects import *
- from TestRecord import *
-
- import DNS
-
-
- class Property (object):
- """Base class of all property tests
- --------------------------------
- name: the name of this property test (can contain spaces, in Unicode)
- testRecord: a TestRecord object
- """
- # improving performance by not having __dict__
- __slots__ = ('_rPat', 'name', 'testRecord', 'recipientPattern')
-
- def __init__ (self, name, testRecord, recipientPattern):
- self.name = name
- self.testRecord = testRecord # thread-safe
- self._rPat = recipientPattern
-
- def __getattr__ (self, name):
- # lazy initialization
- if name == 'recipientPattern':
- if self._rPat:
- self.recipientPattern = re.compile(self._rPat)
- else:
- self.recipientPattern = None
- return self.recipientPattern
- else:
- raise AttributeError('No attribute %s in this %s instance.' % (name, self.__class__.__name__))
-
- def changeRecipientPattern (self, newPattern):
- # CAUTION: self._rPat is left unchanged
- self.recipientPattern = re.compile(newPattern)
-
- def run (self, msg):
- """Run a property test on the msg; returns tuple (result, float) where the result
- can be a bool (True = positive, False = negative) or a string (if the property is
- positive and has special info), and the float is the CPU time spent on this
- property checking (in usec)."""
- start = now()
- result = self._run(msg)
- finish = now()
-
- return result, (finish - start) * 1000000.0
-
- def contentString (self):
- return u''
-
- def getAttribute_ (self, name):
- """This is basically for Obj-C side of PyObjC bridge so we can get at the instance variables"""
- return getattr(self, name)
-
- def setAttribute_withValue_ (self, name, value):
- """This is basically for Obj-C side of PyObjC bridge so we can set an instance variable"""
- setattr(self, name, value)
-
-
- class PropertyMessageMalformed (Property):
- """Positive iff the message is malformed.
- IMPORTANT: STOP DOING MORE TESTS IF THIS ONE IS POSITIVE!"""
- __slots__ = ()
-
- def __init__ (self, testRecord, recipientPattern):
- Property.__init__(self, u'Message is malformed/containing \'\\0\'', testRecord, recipientPattern)
-
- def _run (self, msg):
- return msg.m is None or msg.containingNull
-
-
- class PropertyMessageIDMalformed (Property):
- """Positive iff the message ID is malformed or missing."""
- __slots__ = ()
-
- def __init__ (self, testRecord, recipientPattern):
- Property.__init__(self, u'Message ID is malformed/missing', testRecord, recipientPattern)
-
- def _run (self, msg):
- return msg.mID is None
-
-
- class PropertySenderMalformed (Property):
- """Positive iff the address is malformed."""
- __slots__ = ()
-
- def __init__ (self, testRecord, recipientPattern):
- Property.__init__(self, u'Sender is malformed', testRecord, recipientPattern)
-
- def _run (self, msg):
- if msg.senderEmail is False:
- return msg.sender
-
- return False
-
-
- class PropertySubjectMissing (Property):
- """Positive iff the subject is missing."""
- __slots__ = ()
-
- def __init__ (self, testRecord, recipientPattern):
- Property.__init__(self, u'Subject is missing', testRecord, recipientPattern)
-
- def _run (self, msg):
- return len(msg.subject) == 0
-
-
- class PropertyDateMissing (Property):
- """Positive iff the date is missing."""
- __slots__ = ()
-
- def __init__ (self, testRecord, recipientPattern):
- Property.__init__(self, u'Date is missing', testRecord, recipientPattern)
-
- def _run (self, msg):
- return msg.date is None
-
-
- class PropertyDateMalformed (Property):
- """Positive iff the date is malformed; after this test
- an attribute 'dateObj' in the Message obj will be added - it's None for malformed
- date, or a datetime object if otherwise."""
- __slots__ = ()
-
- def __init__ (self, testRecord, recipientPattern):
- Property.__init__(self, u'Date is malformed', testRecord, recipientPattern)
-
- def _run (self, msg):
- return msg.dateObj is False
-
-
- class PropertyDateInFuture (Property):
- """Positive iff the date is in future."""
- # improving performance by not having __dict__
- __slots__ = ('numMinutes', 'timeDelta')
-
- def __init__ (self, testRecord, recipientPattern, numMinutes):
- Property.__init__(self, u'Date is in future', testRecord, recipientPattern)
- self.numMinutes = numMinutes
- self.timeDelta = datetime.timedelta(minutes = numMinutes)
-
- def _run (self, msg):
- if msg.timeDelta is not None and msg.timeDelta >= self.timeDelta:
- return 'Ahead %s' % msg.timeDelta
-
- return False
-
- def contentString (self):
- return u'Ahead: %d minute(s)' % self.numMinutes
-
- def setNumMinutes_ (self, numMinutes):
- self.numMinutes = numMinutes
- self.timeDelta = datetime.timedelta(minutes = numMinutes)
-
-
- class PropertyDateInThePast (Property):
- """Positive iff the date is in the past."""
- # improving performance by not having __dict__
- __slots__ = ('numDays', 'timeDelta')
-
- def __init__ (self, testRecord, recipientPattern, numDays):
- Property.__init__(self, u'Date is in the past', testRecord, recipientPattern)
- self.numDays = numDays
- self.timeDelta = datetime.timedelta(days = -numDays)
-
- def _run (self, msg):
- if msg.timeDelta is not None and msg.timeDelta <= self.timeDelta:
- return 'Behind %s' % -msg.timeDelta
-
- return False
-
- def contentString (self):
- return u'Behind: %d day(s)' % self.numDays
-
- def setNumDays_ (self, numDays):
- self.numDays = numDays
- self.timeDelta = datetime.timedelta(days = -numDays)
-
-
- class PropertyRecipientMissing (Property):
- """Positive iff the recipient is missing."""
- __slots__ = ()
-
- def __init__ (self, testRecord, recipientPattern):
- Property.__init__(self, u'Recipient is missing', testRecord, recipientPattern)
-
- def _run (self, msg):
- return msg.numRecipients == 0
-
-
- class PropertyRecipientsTooMany (Property):
- """Positive iff there are too many recipients."""
- __slots__ = 'intArg'
-
- def __init__ (self, testRecord, recipientPattern, recipientsLimit):
- Property.__init__(self, u'Too many recipients', testRecord, recipientPattern)
- self.intArg = recipientsLimit
-
- def _run (self, msg):
- if msg.numRecipients > self.intArg:
- return '%d recipient(s)' % msg.numRecipients
-
- return False
-
- def contentString (self):
- return u'> %d recipients' % self.intArg
-
-
- class PropertyRecipientsMismatch (Property):
- """Positive iff no match is possible."""
- __slots__ = ()
-
- def __init__ (self, testRecord, recipientPattern):
- Property.__init__(self, u'Recipient(s) mismatch', testRecord, recipientPattern)
-
- def _run (self, msg):
- recipientPatterns = globalObjects.recipientPatterns
- if msg.numRecipients > 0 and recipientPatterns is not None:
- # NOTE: if recipientPatterns has not been configured, we won't report a positive here
- # (see SimplePatterns.match())
- return not filter(lambda r:recipientPatterns.match(r) is not False, msg.decodedRecipients)
- return False
-
-
- class PropertyRecipientMalformed (Property):
- """Positive iff at least one recipient is malformed."""
- __slots__ = ()
-
- def __init__ (self, testRecord, recipientPattern):
- Property.__init__(self, u'Recipient is malformed', testRecord, recipientPattern)
-
- def _run (self, msg):
- if msg.numRecipients == -1:
- return msg.recipients # it's a string when some recipient address is malformed
-
- return False
-
-
- class PropertyTooFewKnownRecipients (Property):
- """Positive iff too few recipients are on user's address book."""
- # improving performance by not having __dict__
- __slots__ = 'intArg'
-
- def __init__ (self, testRecord, recipientPattern, acquaintantLimit):
- Property.__init__(self, u'Too few known recipients', testRecord, recipientPattern)
- self.intArg = acquaintantLimit
-
- def _run (self, msg):
- if msg.numRecipients > 1:
- numAcquaintants = len(filter(lambda r: r[1] in globalObjects.addressSet,
- msg.recipients))
- if numAcquaintants < self.intArg:
- return '%d acquaintant(s)' % numAcquaintants
-
- return False
-
-
- class PropertyHTMLAttachment (Property):
- """Positive iff an HTML attachment exists."""
- __slots__ = ()
-
- def __init__ (self, testRecord, recipientPattern):
- Property.__init__(self, u'HTML attachment', testRecord, recipientPattern)
-
- def _run (self, msg):
- return msg.isHTML
-
-
- class PropertyHTMLBadTags (Property):
- """Positive iff an HTML attachment has too many bad tags."""
- # improving performance by not having __dict__
- __slots__ = 'intArg'
-
- def __init__ (self, testRecord, recipientPattern, badTagLimit):
- Property.__init__(self, u'HTML has too many bad tags', testRecord, recipientPattern)
- self.intArg = badTagLimit
-
- def _run (self, msg):
- if msg.isHTML:
- numBadTags = len(msg.htmlBody.badTagList)
- if numBadTags >= self.intArg:
- return '%d bad tag(s)' % numBadTags
-
- return False
-
- def contentString (self):
- return u'>= %d bad tag(s)' % self.intArg
-
-
- class PropertyHTMLHiddenURLs (Property):
- """Positive iff an HTML attachment has too many hidden URLs."""
- # improving performance by not having __dict__
- __slots__ = 'intArg'
-
- def __init__ (self, testRecord, recipientPattern, hiddenURLLimit):
- Property.__init__(self, u'HTML has too many hidden URLs', testRecord, recipientPattern)
- self.intArg = hiddenURLLimit
-
- def _run (self, msg):
- if msg.isHTML:
- numHiddenURLs = len(msg.htmlBody.hiddenURLList)
- if numHiddenURLs >= self.intArg:
- return '%d hidden URL(s)' % numHiddenURLs
-
- return False
-
- def contentString (self):
- return u'>= %d hidden URL(s)' % self.intArg
-
-
- class PropertyHTMLVacuousTags (Property):
- """Positive iff an HTML attachment has too many vacuous tags."""
- # improving performance by not having __dict__
- __slots__ = 'intArg'
-
- def __init__ (self, testRecord, recipientPattern, vacuousTagLimit):
- Property.__init__(self, u'HTML has too many vacuous tags', testRecord, recipientPattern)
- self.intArg = vacuousTagLimit
-
- def _run (self, msg):
- if msg.isHTML:
- numVacuousTags = len(msg.htmlBody.vacuousTagList)
- if numVacuousTags >= self.intArg:
- return '%d vacuous tag(s)' % numVacuousTags
-
- return False
-
- def contentString (self):
- return u'>= %d vacuous tag(s)' % self.intArg
-
-
- class PropertyBlankRendering (Property):
- """Positive iff an email renders nothing (either text or HTML)."""
- __slots__ = ()
-
- def __init__ (self, testRecord, recipientPattern):
- Property.__init__(self, u'Blank rendering', testRecord, recipientPattern)
-
- def _run (self, msg):
- if msg.isHTML:
- return len(msg.rendering) == 0
- else:
- return len(msg.body) == 0
-
-
- class PropertyHasBadSites (Property):
- """Positive iff an email refers to a bad site."""
- __slots__ = ()
-
- def __init__ (self, testRecord, recipientPattern):
- Property.__init__(self, u'Has bad site(s)', testRecord, recipientPattern)
-
- def _run (self, msg):
- if msg.badSite is not None:
- return msg.badSite
-
- return False
-
-
- class PropertyOpenRelay (Property):
- """Positive iff an any IP in the headers, except for the safe IPs, is blacklisted."""
- # improving performance by not having __dict__
- __slots__ = ('timeout', 'numLastIPs', 'blackLists')
-
- def __init__ (self, testRecord, recipientPattern, timeout, numLastIPs, blackLists):
- Property.__init__(self, u'Open relay', testRecord, recipientPattern)
- self.timeout = timeout
- self.numLastIPs = numLastIPs
- self.blackLists = blackLists
-
- def _run (self, msg):
- if len(msg.headerIPs):
- # NOTE: if safeIPs has not been configured, we check every IP
- # (see SimplePatterns.match())
- safeIPs = globalObjects.safeIPs
- result = checkIPList(filter(lambda ip:safeIPs.match(ip) is False,
- msg.headerIPs)[-self.numLastIPs:],
- self.timeout, self.blackLists)
-
- if result is not None:
- return '%s (%s)' % (result[0], result[1])
-
- return False
-
- def contentString (self):
- return u'Check last %d IPs with timeout %.1f sec' % (self.numLastIPs, self.timeout)
-
-
- class PropertyDomainNoMX (Property):
- """Positive iff the domain of the sender's email address has no MX record."""
- __slots__ = ()
-
- def __init__ (self, testRecord, recipientPattern):
- Property.__init__(self, u'Domain has no MX record or bogus', testRecord, recipientPattern)
-
- def _run (self, msg):
- if msg.senderDomain:
- try:
-
- try:
- # TO-DO: is PyDNS thread-safe?
- DNS.DiscoverNameServers()
- if len(map(lambda x:x['data'],
- DNS.DnsRequest(msg.senderDomain, qtype = 'mx',
- timeout = DEFAULT_MX_TIMEOUT).req().answers)) == 0:
- return msg.senderDomain
-
- except DNS.DNSError, e:
- # if there's no DNS hosts for lookup, assume domain has MX
- # but timeout is NOT ok
- if str(e) != 'no working nameservers found':
- return msg.senderDomain
-
- except:
- # for unknown reasons the DNS query fails (maybe due to bad setup)
- pass
-
- return False
-
- class PropertyPhishingURL (Property):
- """Positive iff an HTML email contains at least a phishing URL."""
- # improving performance by not having __dict__
- __slots__ = 'checkWhitelistedEmail'
-
- def __init__ (self, testRecord, recipientPattern, checkWhitelistedEmail):
- Property.__init__(self, u'Has a phishing URL', testRecord, recipientPattern)
- self.checkWhitelistedEmail = checkWhitelistedEmail
-
- def _run (self, msg):
- t = msg.phishingURL
- if t:
- return u'%s claimed to be %s' % (t[-1][0], t[-1][1])
- else:
- return False
-
- if __name__ == '__main__':
- import sys
-
- if len(sys.argv) == 1:
- print 'Usage: ./Property.py <filename>'
- print ' * filename is the name of the file containing email raw source.'
- sys.exit(1)
-
- from Message import *
-
- oldSiteDBSize = globalObjects.siteDB.size()
-
- msg = Message(open(sys.argv[1]).read())
-
- # PropertyMessageMalformed must be the *first* thing to test
- # all the other tests can proceed only if PropertyMessageMalformed is negative
- p = PropertyMessageMalformed(TestRecord(), None)
- result, cpuTime = p.run(msg)
- if result:
- print '* %s'%p.name
-
- else:
- properties = [PropertyMessageIDMalformed(TestRecord(), None),
- PropertySenderMalformed(TestRecord(), None),
- PropertySubjectMissing(TestRecord(), None),
- PropertyDateMissing(TestRecord(), None),
- PropertyDateMalformed(TestRecord(), None),
- PropertyDateInFuture(TestRecord(), None, 120),
- PropertyDateInThePast(TestRecord(), None, 2),
- PropertyRecipientMissing(TestRecord(), None),
- PropertyRecipientsTooMany(TestRecord(), None, 5),
- PropertyRecipientsMismatch(TestRecord(), None),
- PropertyRecipientMalformed(TestRecord(), None),
- PropertyTooFewKnownRecipients(TestRecord(), None, 1),
- PropertyHTMLAttachment(TestRecord(), None),
- PropertyHTMLBadTags(TestRecord(), None, 5),
- PropertyHTMLHiddenURLs(TestRecord(), None, 1),
- PropertyHTMLVacuousTags(TestRecord(), None, 3),
- PropertyBlankRendering(TestRecord(), None),
- PropertyHasBadSites(TestRecord(), None),
- PropertyOpenRelay(TestRecord(), None, 0.5, 3, ['bl.spamcop.net']),
- PropertyDomainNoMX(TestRecord(), None)]
-
- for r in properties:
- result, cpuTime = r.run(msg)
- if result is not False:
- if result is True:
- s = '* %s' % r.name
- else:
- s = encodeText('* %s (%s)' % (r.name, result))
-
- print '%s: %f usec(s)' % (s, cpuTime)
-
- msg.addSites()
- print '* SiteDB count change:', globalObjects.siteDB.size() - oldSiteDBSize
-